	.file	"wm_sqrt.S"
/*---------------------------------------------------------------------------+
 |  wm_sqrt.S                                                                |
 |                                                                           |
 | Fixed point arithmetic square root evaluation.                            |
 |                                                                           |
 | Copyright (C) 1992,1993                                                   |
 |                       W. Metzenthen, 22 Parker St, Ormond, Vic 3163,      |
 |                       Australia.  E-mail apm233m@vaxc.cc.monash.edu.au    |
 |                                                                           |
 | Call from C as:                                                           |
 |   void wm_sqrt(FPU_REG *n, unsigned int control_word)                     |
 |                                                                           |
 +---------------------------------------------------------------------------*/

/*---------------------------------------------------------------------------+
 |  wm_sqrt(FPU_REG *n, unsigned int control_word)                           |
 |    returns the square root of n in n.                                     |
 |                                                                           |
 |  Use Newton's method to compute the square root of a number, which must   |
 |  be in the range  [1.0 .. 4.0),  to 64 bits accuracy.                     |
 |  Does not check the sign or tag of the argument.                          |
 |  Sets the exponent, but not the sign or tag of the result.                |
 |                                                                           |
 |  The guess is kept in %esi:%edi                                           |
 +---------------------------------------------------------------------------*/

#include "exception.h"
#include "fpu_asm.h"


.data
/*
	Local storage:
 */
	.align 4,0
accum_3:
	.long	0		// ms word
accum_2:
	.long	0
accum_1:
	.long	0
accum_0:
	.long	0

// The de-normalised argument:
//                  sq_2                  sq_1              sq_0
//        b b b b b b b ... b b b   b b b .... b b b   b 0 0 0 ... 0
//           ^ binary point here
fsqrt_arg_2:
	.long	0		// ms word
fsqrt_arg_1:
	.long	0
fsqrt_arg_0:
	.long	0		// ls word, at most the ms bit is set

.text
	.align 2,144

.globl wm_sqrt

wm_sqrt:
	pushl	%ebp
	movl	%esp,%ebp
	pushl	%esi
	pushl	%edi
	pushl	%ebx

	movl	PARAM1,%esi

	movl	SIGH(%esi),%eax
	movl	SIGL(%esi),%ecx
	xorl	%edx,%edx

// We use a rough linear estimate for the first guess..

	cmpl	EXP_BIAS,EXP(%esi)
	jnz	sqrt_arg_ge_2

	shrl	$1,%eax			/* arg is in the range  [1.0 .. 2.0) */
	rcrl	$1,%ecx
	rcrl	$1,%edx

sqrt_arg_ge_2:
// From here on, n is never accessed directly again until it is
// replaced by the answer.

	movl	%eax,fsqrt_arg_2		// ms word of n
	movl	%ecx,fsqrt_arg_1
	movl	%edx,fsqrt_arg_0

// Make a linear first estimate
	shrl	$1,%eax
	addl	$0x40000000,%eax
	movl	$0xaaaaaaaa,%ecx
	mull	%ecx
	shll	%edx			/* max result was 7fff... */
	testl	$0x80000000,%edx	/* but min was 3fff... */
	jnz	sqrt_prelim_no_adjust

	movl	$0x80000000,%edx	/* round up */

sqrt_prelim_no_adjust:
	movl	%edx,%esi	// Our first guess

/* We have now computed (approx)   (2 + x) / 3, which forms the basis
   for a few iterations of Newton's method */

	movl	fsqrt_arg_2,%ecx	// ms word

// From our initial estimate, three iterations are enough to get us
// to 30 bits or so. This will then allow two iterations at better
// precision to complete the process.

// Compute  (g + n/g)/2  at each iteration (g is the guess).
	shrl	%ecx		// Doing this first will prevent a divide
				// overflow later.

	movl	%ecx,%edx	// msw of the arg / 2
	divl	%esi		// current estimate
	shrl	%esi		// divide by 2
	addl	%eax,%esi	// the new estimate

	movl	%ecx,%edx
	divl	%esi
	shrl	%esi
	addl	%eax,%esi

	movl	%ecx,%edx
	divl	%esi
	shrl	%esi
	addl	%eax,%esi

// Now that an estimate accurate to about 30 bits has been obtained (in %esi),
// we improve it to 60 bits or so.

// The strategy from now on is to compute new estimates from
//      guess := guess + (n - guess^2) / (2 * guess)

// First, find the square of the guess
	movl	%esi,%eax
	mull	%esi
// guess^2 now in %edx:%eax

	movl	fsqrt_arg_1,%ecx
	subl	%ecx,%eax
	movl	fsqrt_arg_2,%ecx	// ms word of normalized n
	sbbl	%ecx,%edx
	jnc	sqrt_stage_2_positive

// subtraction gives a negative result
// negate the result before division
	notl	%edx
	notl	%eax
	addl	$1,%eax
	adcl	$0,%edx

	divl	%esi
	movl	%eax,%ecx

	movl	%edx,%eax
	divl	%esi
	jmp	sqrt_stage_2_finish

sqrt_stage_2_positive:
	divl	%esi
	movl	%eax,%ecx

	movl	%edx,%eax
	divl	%esi

	notl	%ecx
	notl	%eax
	addl	$1,%eax
	adcl	$0,%ecx

sqrt_stage_2_finish:
	sarl	$1,%ecx		// divide by 2
	rcrl	$1,%eax

	// Form the new estimate in %esi:%edi
	movl	%eax,%edi
	addl	%ecx,%esi

	jnz	sqrt_stage_2_done	// result should be [1..2)

#ifdef PARANOID
// It should be possible to get here only if the arg is ffff....ffff
	cmp	$0xffffffff,fsqrt_arg_1
	jnz	sqrt_stage_2_error
#endif PARANOID

// The best rounded result.
	xorl	%eax,%eax
	decl	%eax
	movl	%eax,%edi
	movl	%eax,%esi
	movl	$0x7fffffff,%eax
	jmp	sqrt_round_result

#ifdef PARANOID
sqrt_stage_2_error:
	pushl	EX_INTERNAL|0x213
	call	EXCEPTION
#endif PARANOID

sqrt_stage_2_done:

// Now the square root has been computed to better than 60 bits

// Find the square of the guess
	movl	%edi,%eax		// ls word of guess
	mull	%edi
	movl	%edx,accum_1

	movl	%esi,%eax
	mull	%esi
	movl	%edx,accum_3
	movl	%eax,accum_2

	movl	%edi,%eax
	mull	%esi
	addl	%eax,accum_1
	adcl	%edx,accum_2
	adcl	$0,accum_3

//	movl	%esi,%eax
//	mull	%edi
	addl	%eax,accum_1
	adcl	%edx,accum_2
	adcl	$0,accum_3

// guess^2 now in accum_3:accum_2:accum_1

	movl	fsqrt_arg_0,%eax		// get normalized n
	subl	%eax,accum_1
	movl	fsqrt_arg_1,%eax
	sbbl	%eax,accum_2
	movl	fsqrt_arg_2,%eax		// ms word of normalized n
	sbbl	%eax,accum_3
	jnc	sqrt_stage_3_positive

// subtraction gives a negative result
// negate the result before division
	notl	accum_1
	notl	accum_2
	notl	accum_3
	addl	$1,accum_1
	adcl	$0,accum_2

#ifdef PARANOID
	adcl	$0,accum_3	// This must be zero
	jz	sqrt_stage_3_no_error

sqrt_stage_3_error:
	pushl	EX_INTERNAL|0x207
	call	EXCEPTION

sqrt_stage_3_no_error:
#endif PARANOID

	movl	accum_2,%edx
	movl	accum_1,%eax
	divl	%esi
	movl	%eax,%ecx

	movl	%edx,%eax
	divl	%esi

	sarl	$1,%ecx		// divide by 2
	rcrl	$1,%eax

	// prepare to round the result

	addl	%ecx,%edi
	adcl	$0,%esi

	jmp	sqrt_stage_3_finished

sqrt_stage_3_positive:
	movl	accum_2,%edx
	movl	accum_1,%eax
	divl	%esi
	movl	%eax,%ecx

	movl	%edx,%eax
	divl	%esi

	sarl	$1,%ecx		// divide by 2
	rcrl	$1,%eax

	// prepare to round the result

	notl	%eax		// Negate the correction term
	notl	%ecx
	addl	$1,%eax
	adcl	$0,%ecx		// carry here ==> correction == 0
	adcl	$0xffffffff,%esi

	addl	%ecx,%edi
	adcl	$0,%esi

sqrt_stage_3_finished:

// The result in %esi:%edi:%esi should be good to about 90 bits here,
// and the rounding information here does not have sufficient accuracy
// in a few rare cases.
	cmpl	$0xffffffe0,%eax
	ja	sqrt_near_exact_x

	cmpl	$0x00000020,%eax
	jb	sqrt_near_exact

	cmpl	$0x7fffffe0,%eax
	jb	sqrt_round_result

	cmpl	$0x80000020,%eax
	jb	sqrt_get_more_precision

sqrt_round_result:
// Set up for rounding operations
	movl	%eax,%edx
	movl	%esi,%eax
	movl	%edi,%ebx
	movl	PARAM1,%edi
	movl	EXP_BIAS,EXP(%edi)	// Result is in  [1.0 .. 2.0)
	movl	PARAM2,%ecx
	jmp	FPU_round_sqrt


sqrt_near_exact_x:
// First, the estimate must be rounded up.
	addl	$1,%edi
	adcl	$0,%esi

sqrt_near_exact:
// This is an easy case because x^1/2 is monotonic.
// We need just find the square of our estimate, compare it
// with the argument, and deduce whether our estimate is
// above, below, or exact. We use the fact that the estimate
// is known to be accurate to about 90 bits.
	movl	%edi,%eax		// ls word of guess
	mull	%edi
	movl	%edx,%ebx		// 2nd ls word of square
	movl	%eax,%ecx		// ls word of square

	movl	%edi,%eax
	mull	%esi
	addl	%eax,%ebx
	addl	%eax,%ebx

#ifdef PARANOID
	cmp	$0xffffffb0,%ebx
	jb	sqrt_near_exact_ok

	cmp	$0x00000050,%ebx
	ja	sqrt_near_exact_ok

	pushl	EX_INTERNAL|0x214
	call	EXCEPTION

sqrt_near_exact_ok:
#endif PARANOID

	or	%ebx,%ebx
	js	sqrt_near_exact_small

	jnz	sqrt_near_exact_large

	or	%ebx,%edx
	jnz	sqrt_near_exact_large

// Our estimate is exactly the right answer
	xorl	%eax,%eax
	jmp	sqrt_round_result

sqrt_near_exact_small:
// Our estimate is too small
	movl	$0x000000ff,%eax
	jmp	sqrt_round_result
	
sqrt_near_exact_large:
// Our estimate is too large, we need to decrement it
	subl	$1,%edi
	sbbl	$0,%esi
	movl	$0xffffff00,%eax
	jmp	sqrt_round_result


sqrt_get_more_precision:
// This case is almost the same as the above, except we start
// with an extra bit of precision in the estimate.
	stc			// The extra bit.
	rcll	$1,%edi		// Shift the estimate left one bit
	rcll	$1,%esi

	movl	%edi,%eax		// ls word of guess
	mull	%edi
	movl	%edx,%ebx		// 2nd ls word of square
	movl	%eax,%ecx		// ls word of square

	movl	%edi,%eax
	mull	%esi
	addl	%eax,%ebx
	addl	%eax,%ebx

// Put our estimate back to its original value
	stc			// The ms bit.
	rcrl	$1,%esi		// Shift the estimate left one bit
	rcrl	$1,%edi

#ifdef PARANOID
	cmp	$0xffffff60,%ebx
	jb	sqrt_more_prec_ok

	cmp	$0x000000a0,%ebx
	ja	sqrt_more_prec_ok

	pushl	EX_INTERNAL|0x215
	call	EXCEPTION

sqrt_more_prec_ok:
#endif PARANOID

	or	%ebx,%ebx
	js	sqrt_more_prec_small

	jnz	sqrt_more_prec_large

	or	%ebx,%ecx
	jnz	sqrt_more_prec_large

// Our estimate is exactly the right answer
	movl	$0x80000000,%eax
	jmp	sqrt_round_result

sqrt_more_prec_small:
// Our estimate is too small
	movl	$0x800000ff,%eax
	jmp	sqrt_round_result
	
sqrt_more_prec_large:
// Our estimate is too large
	movl	$0x7fffff00,%eax
	jmp	sqrt_round_result
